{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "Regression Metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.datasets import load_boston\n",
    "boston = load_boston()\n",
    "\n",
    "X = boston.data\n",
    "y = boston.target\n",
    "\n",
    "from sklearn.model_selection import train_test_split, cross_val_score\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=7)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.46455839325055914"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.neighbors import KNeighborsRegressor\n",
    "from sklearn.model_selection import RandomizedSearchCV\n",
    "\n",
    "knn_reg = KNeighborsRegressor()\n",
    "param_dist = {'n_neighbors': list(range(3,20,1))}\n",
    "rs = RandomizedSearchCV(knn_reg,param_dist,cv=10,n_iter=17)\n",
    "rs.fit(X_train, y_train)\n",
    "rs.best_score_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.7439511908709866"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.linear_model import Ridge\n",
    "cross_val_score(Ridge(),X_train,y_train,cv=10).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.83924130876806269"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor\n",
    "cross_val_score(GradientBoostingRegressor(max_depth=7),X_train,y_train,cv=10).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.83391264819151067"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cross_val_score(RandomForestRegressor(),X_train,y_train,cv=10).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RandomizedSearchCV(cv=None, error_score='raise',\n",
       "          estimator=GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,\n",
       "             learning_rate=0.1, loss='ls', max_depth=3, max_features=None,\n",
       "             max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
       "             min_impurity_split=None, min_samples_leaf=1,\n",
       "             min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
       "             n_estimators=100, presort='auto', random_state=None,\n",
       "             subsample=1.0, verbose=0, warm_start=False),\n",
       "          fit_params=None, iid=True, n_iter=5, n_jobs=-1,\n",
       "          param_distributions={'n_estimators': [4000], 'learning_rate': [0.01], 'max_depth': [1, 2, 3, 5, 7]},\n",
       "          pre_dispatch='2*n_jobs', random_state=None, refit=True,\n",
       "          return_train_score=True, scoring=None, verbose=0)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "param_dist = {'n_estimators': [4000], 'learning_rate': [0.01], 'max_depth':[1,2,3,5,7]}\n",
    "rs_inst_a = RandomizedSearchCV(GradientBoostingRegressor(), param_dist, n_iter = 5, n_jobs=-1)\n",
    "rs_inst_a.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.8841975870503056"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rs_inst_a.best_params_\n",
    "{'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 4000}\n",
    "rs_inst_a.best_score_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def mape_score(y_test, y_pred):\n",
    "    return (np.abs(y_test - y_pred)/y_test).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from __future__ import division\n",
    "from numba import autojit\n",
    "\n",
    "@autojit\n",
    "def mape_score(y_test, y_pred):\n",
    "    sum_total = 0\n",
    "    y_vec_length = len(y_test)\n",
    "    for index in range(y_vec_length):\n",
    "        sum_total += (1 - (y_pred[index]/y_test[index]))\n",
    " \n",
    "    return sum_total/y_vec_length\n",
    "\n",
    "from sklearn.metrics import make_scorer\n",
    "mape_scorer = make_scorer(mape_score, greater_is_better=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.021086502313661441"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "param_dist = {'n_estimators': [4000], 'learning_rate': [0.01], 'max_depth':[1,2,3,4,5]}\n",
    "rs_inst_b = RandomizedSearchCV(GradientBoostingRegressor(), param_dist, n_iter = 3, n_jobs=-1,scoring = mape_scorer)\n",
    "rs_inst_b.fit(X_train, y_train)\n",
    "rs_inst_b.best_score_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'learning_rate': 0.01, 'max_depth': 1, 'n_estimators': 4000}"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rs_inst_b.best_params_"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
